pacman::p_load(tidyverse, ggplot2, tsibble, tsibbledata, fable, fabletools, feasts,
plotly, DT, fable.prophet, seasonal)Take_Home Exercise 3: Prototyping Modules for Visual Analytics Shiny Application
Visualising and Analysing Time-oriented Data
1. Overview
For this take-home exercise 3, we are required to select one module from our proposed Shiny application and complete the following tasks:
Identify and verify the R packages required for the selected module, ensuring they are available and supported on CRAN.
Develop and test the corresponding R code to ensure it executes correctly and produces the expected results.
Define the key parameters and outputs that will be exposed within the Shiny application interface.
Choose suitable Shiny UI components for presenting the identified parameters and outputs.
The focus for this submission will be on the univariate forecasting module of the application.
2. Getting Started
2.1 Installing and loading the required libraries
For the purpose of this hands-on exercise, the following R packages will be used.
3. Data Wrangling
As this take-home exercise focuses on the analysis of a single module within the overall project, the data preparation process has been carried out once to maintain consistency across all team members. The shared dataset, weather_data_updated.csv, will be used uniformly throughout. A detailed explanation of the data preparation steps can be accessed via the provided link.
3.1 Importing data
The processed data has been imported using read_csv.
weather <- read_csv("data/weather_data_cleaned.csv")pacman::p_load(tidyverse, naniar, lubridate, imputeTS)
library(data.table)
vis_miss(weather)
head(weather)# A tibble: 6 × 9
Region Station Date `Daily Rainfall Total (mm)` Mean Temperature (°C…¹
<chr> <chr> <date> <dbl> <dbl>
1 North Admiralty 2020-01-01 0 27.5
2 North Admiralty 2020-01-02 0 27.7
3 North Admiralty 2020-01-03 0 27.9
4 North Admiralty 2020-01-04 0.4 29.2
5 North Admiralty 2020-01-05 8.6 26.3
6 North Admiralty 2020-01-06 10 26.4
# ℹ abbreviated name: ¹`Mean Temperature (°C)`
# ℹ 4 more variables: `Maximum Temperature (°C)` <dbl>,
# `Minimum Temperature (°C)` <dbl>, Latitude <dbl>, Longitude <dbl>
3.2 Convert to tsibble data frame
The code chunk below converting ts_data from tibble object into tsibble object by using as_tsibble() of tsibble R package.
weather_tsbl <- as_tsibble(weather, key = Station, index = Date)
weather_tsbl# A tsibble: 30,532 x 9 [1D]
# Key: Station [17]
Region Station Date Daily Rainfall Total (mm…¹ Mean Temperature (°C…²
<chr> <chr> <date> <dbl> <dbl>
1 North Admiralty 2020-01-01 0 27.5
2 North Admiralty 2020-01-02 0 27.7
3 North Admiralty 2020-01-03 0 27.9
4 North Admiralty 2020-01-04 0.4 29.2
5 North Admiralty 2020-01-05 8.6 26.3
6 North Admiralty 2020-01-06 10 26.4
7 North Admiralty 2020-01-07 5.6 28.7
8 North Admiralty 2020-01-08 1 28.4
9 North Admiralty 2020-01-09 1.4 28.9
10 North Admiralty 2020-01-10 8 26.5
# ℹ 30,522 more rows
# ℹ abbreviated names: ¹`Daily Rainfall Total (mm)`, ²`Mean Temperature (°C)`
# ℹ 4 more variables: `Maximum Temperature (°C)` <dbl>,
# `Minimum Temperature (°C)` <dbl>, Latitude <dbl>, Longitude <dbl>
Station <- weather_tsbl %>% distinct(Station)
datatable(Station,
class= "compact",
rownames = FALSE,
width="100%",
options = list(pageLength = 12,scrollX=T))4. Section 1: Visualising Time Series Data
selected_stations <- unique(weather_tsbl$Station)
variable_temp <- "Mean Temperature (°C)"
variable_rain <- "Daily Rainfall Total (mm)"
start_date <- "2020-01-01"
end_date <- "2024-12-31"filtered_period <- weather_tsbl %>%
filter_index(start_date ~ end_date)
filtered_period_mstn <- filtered_period %>%
filter(Station %in% selected_stations)
plot_ly(filtered_period_mstn,
x = ~Date,
y = as.formula(paste0("~`", variable_temp, "`")),
type = 'scatter',
mode = 'lines',
color = ~Station,
hoverinfo = 'text',
text = ~paste("<b>Station:</b>", Station,
"<br><b>Date:</b>", Date,
"<br><b>", variable_temp, ":</b>", filtered_period_mstn[[variable_temp]])) %>%
layout(title = paste(variable_temp, "by Station"),
xaxis = list(title = ""),
yaxis = list(title = "")) %>%
layout(xaxis = list(rangeslider = list(type = "date"))) The code chunk below plot line graph for weekly summarise view of the following variables: “Mean Temperature (°C)”
summarised_temp_week <- filtered_period %>%
group_by_key() %>%
index_by(year_week = ~ yearweek(.)) %>%
summarise(
avg_temp = round(mean(.data[[variable_temp]]),2), na.rm = TRUE)
summarised_temp_mstn <- summarised_temp_week %>%
filter(Station %in% selected_stations) summarised_temp_mstn <- summarised_temp_mstn %>%
mutate(week_start_date = floor_date(as.Date(year_week), unit = "week"))
plot_ly(summarised_temp_mstn, x = ~week_start_date, y = ~avg_temp,
type = 'scatter', mode = 'lines', color = ~Station,
hoverinfo = 'text',
text = ~paste("<b>Station:</b>", Station,
"<br><b>Week Starting:</b>", week_start_date,
"<br><b>Average", variable_temp, ":</b>", avg_temp)) %>%
layout(title = paste("Weekly Average", variable_temp, "by Station"),
xaxis = list(title = ""),
yaxis = list(title = "")) %>%
layout(xaxis = list(rangeslider = list(type = "date"))) Line graph for monthly summarise view of the following variables: “Mean Temperature (°C)”
summarised_temp_month <- filtered_period %>%
group_by_key() %>%
index_by(year_month = ~ yearmonth(.)) %>%
summarise(
avg_temp = round(mean(.data[[variable_temp]]),2), na.rm = TRUE)
summarised_temp_mstn <- summarised_temp_month %>%
filter(Station %in% selected_stations) summarised_temp_mstn <- summarised_temp_mstn %>%
mutate(month_start_date = floor_date(as.Date(year_month), unit = "month"))
plot_ly(summarised_temp_mstn, x = ~month_start_date, y = ~avg_temp,
type = 'scatter', mode = 'lines', color = ~Station,
hoverinfo = 'text',
text = ~paste("<b>Station:</b>", Station,
"<br><b>Month:</b>", year_month,
"<br><b>Average", variable_temp, ":</b>", avg_temp)) %>%
layout(title = paste("Monthly Average", variable_temp, "by Station"),
xaxis = list(title = ""),
yaxis = list(title = "")) %>%
layout(xaxis = list(rangeslider = list(type = "date"))) Variable - Daily Rainfall Total (mm) Time resolution - week
summarised_rain_week <- filtered_period %>%
group_by_key() %>%
index_by(year_week = ~ yearweek(.)) %>%
summarise(
total_rainfall = sum(.data[[variable_rain]]), na.rm = TRUE)
summarised_rain_mstn <- summarised_rain_week %>%
filter(Station %in% selected_stations) summarised_rain_mstn <- summarised_rain_mstn %>%
mutate(week_start_date = floor_date(as.Date(year_week), unit = "week"))
plot_ly(summarised_rain_mstn, x = ~week_start_date, y = ~total_rainfall,
type = 'scatter', mode = 'lines', color = ~Station,
hoverinfo = 'text',
text = ~paste("<b>Station:</b>", Station,
"<br><b>Week Starting:</b>", week_start_date,
"<br><b>Total rainfall :</b>", total_rainfall, "mm")) %>%
layout(title = paste("Weekly", variable_rain, "by Station"),
xaxis = list(title = ""),
yaxis = list(title = "")) %>%
layout(xaxis = list(rangeslider = list(type = "date"))) Variable - Daily Rainfall Total (mm) Time resolution - Month
summarised_rain_month <- filtered_period %>%
group_by_key() %>%
index_by(year_month = ~ yearmonth(.)) %>%
summarise(
total_rainfall = sum(.data[[variable_rain]]), na.rm = TRUE)
summarised_rain_mstn <- summarised_rain_month %>%
filter(Station %in% selected_stations) summarised_rain_mstn <- summarised_rain_mstn %>%
mutate(month_start_date = floor_date(as.Date(year_month), unit = "month"))
plot_ly(summarised_rain_mstn, x = ~~month_start_date, y = ~total_rainfall,
type = 'scatter', mode = 'lines', color = ~Station,
hoverinfo = 'text',
text = ~paste("<b>Station:</b>", Station,
"<br><b>Month:</b>", year_month,
"<br><b>Total rainfall :</b>", total_rainfall, "mm")) %>%
layout(title = paste("Monthly", variable_rain, "by Station"),
xaxis = list(title = ""),
yaxis = list(title = "")) %>%
layout(xaxis = list(rangeslider = list(type = "date"))) 5. Section 2: Time Series Decomposition and auto correlation
This section presents the STL decomposition method — a powerful and flexible technique for breaking down time series data into three key components: trend, seasonality, and remainder.
STL stands for Seasonal and Trend decomposition using Loess, where Loess refers to a smoothing technique used to model nonlinear relationships. By applying STL, we can gain deeper insights into the underlying structure of our time series data, making it easier to interpret patterns and support accurate forecasting.
The plots below illustrate STL decomposition results using various tuning parameters, demonstrating how parameter adjustments influence the decomposition outcome.
5.1 ACF & PACF of Mean Temperature (°C)
ACF Daily Mean Temperature for Different Region
single_region <- "North"filtered_period_sreg <- filtered_period %>%
filter(Region == single_region)
filtered_period_sreg_filled <- filtered_period_sreg %>%
fill_gaps()
ACF <- filtered_period_sreg_filled %>%
ACF(.data[[variable_temp]], lag_max = 100) %>%
autoplot() +
labs(title = paste("ACF plot of", variable_temp, "for Region:", single_region)) +
theme_minimal()
ggplotly(ACF)single_region <- "East"filtered_period_sreg <- filtered_period %>%
filter(Region == single_region)
filtered_period_sreg_filled <- filtered_period_sreg %>%
fill_gaps()
ACF <- filtered_period_sreg_filled %>%
ACF(.data[[variable_temp]], lag_max = 100) %>%
autoplot() +
labs(title = paste("ACF plot of", variable_temp, "for Region:", single_region)) +
theme_minimal()
ggplotly(ACF)single_region <- "Northeast"filtered_period_sreg <- filtered_period %>%
filter(Region == single_region)
filtered_period_sreg_filled <- filtered_period_sreg %>%
fill_gaps()
ACF <- filtered_period_sreg_filled %>%
ACF(.data[[variable_temp]], lag_max = 100) %>%
autoplot() +
labs(title = paste("ACF plot of", variable_temp, "for Region:", single_region)) +
theme_minimal()
ggplotly(ACF)single_region <- "Central"filtered_period_sreg <- filtered_period %>%
filter(Region == single_region)
filtered_period_sreg_filled <- filtered_period_sreg %>%
fill_gaps()
ACF <- filtered_period_sreg_filled %>%
ACF(.data[[variable_temp]], lag_max = 100) %>%
autoplot() +
labs(title = paste("ACF plot of", variable_temp, "for Region:", single_region)) +
theme_minimal()
ggplotly(ACF)single_region <- "West"# Filter data for the selected region
filtered_period_sreg <- filtered_period %>%
filter(Region == single_region)
filtered_period_sreg_filled <- filtered_period_sreg %>%
fill_gaps()
ACF <- filtered_period_sreg_filled %>%
ACF(.data[[variable_temp]], lag_max = 100) %>%
autoplot() +
labs(title = paste("ACF plot of", variable_temp, "for Region:", single_region)) +
theme_minimal()
ggplotly(ACF)PACF Daily Mean Temperature for differnt Region
single_region <- "North"
filtered_period_sreg <- filtered_period %>%
filter(Region == single_region)
filtered_period_sreg_filled <- filtered_period_sreg %>%
fill_gaps()
PACF <- filtered_period_sreg_filled %>%
PACF(filtered_period_sreg_filled[[variable_temp]], lag_max = 100) %>%
autoplot() +
labs(title = paste("PACF plot of", variable_temp, "for Region:", single_region)) +
theme_minimal()
ggplotly(PACF)single_region <- "East"
filtered_period_sreg <- filtered_period %>%
filter(Region == single_region)
filtered_period_sreg_filled <- filtered_period_sreg %>%
fill_gaps()
PACF <- filtered_period_sreg_filled %>%
PACF(filtered_period_sreg_filled[[variable_temp]], lag_max = 100) %>%
autoplot() +
labs(title = paste("PACF plot of", variable_temp, "for Region:", single_region)) +
theme_minimal()
ggplotly(PACF)single_region <- "Northeast"
filtered_period_sreg <- filtered_period %>%
filter(Region == single_region)
filtered_period_sreg_filled <- filtered_period_sreg %>%
fill_gaps()
PACF <- filtered_period_sreg_filled %>%
PACF(filtered_period_sreg_filled[[variable_temp]], lag_max = 100) %>%
autoplot() +
labs(title = paste("PACF plot of", variable_temp, "for Region:", single_region)) +
theme_minimal()
ggplotly(PACF)single_region <- "Central"
filtered_period_sreg <- filtered_period %>%
filter(Region == single_region)
filtered_period_sreg_filled <- filtered_period_sreg %>%
fill_gaps()
PACF <- filtered_period_sreg_filled %>%
PACF(filtered_period_sreg_filled[[variable_temp]], lag_max = 100) %>%
autoplot() +
labs(title = paste("PACF plot of", variable_temp, "for Region:", single_region)) +
theme_minimal()
ggplotly(PACF)single_region <- "West"
filtered_period_sreg <- filtered_period %>%
filter(Region == single_region)
filtered_period_sreg_filled <- filtered_period_sreg %>%
fill_gaps()
PACF <- filtered_period_sreg_filled %>%
PACF(filtered_period_sreg_filled[[variable_temp]], lag_max = 100) %>%
autoplot() +
labs(title = paste("PACF plot of", variable_temp, "for Region:", single_region)) +
theme_minimal()
ggplotly(PACF)filtered_period# A tsibble: 30,532 x 9 [1D]
# Key: Station [17]
Region Station Date Daily Rainfall Total (mm…¹ Mean Temperature (°C…²
<chr> <chr> <date> <dbl> <dbl>
1 North Admiralty 2020-01-01 0 27.5
2 North Admiralty 2020-01-02 0 27.7
3 North Admiralty 2020-01-03 0 27.9
4 North Admiralty 2020-01-04 0.4 29.2
5 North Admiralty 2020-01-05 8.6 26.3
6 North Admiralty 2020-01-06 10 26.4
7 North Admiralty 2020-01-07 5.6 28.7
8 North Admiralty 2020-01-08 1 28.4
9 North Admiralty 2020-01-09 1.4 28.9
10 North Admiralty 2020-01-10 8 26.5
# ℹ 30,522 more rows
# ℹ abbreviated names: ¹`Daily Rainfall Total (mm)`, ²`Mean Temperature (°C)`
# ℹ 4 more variables: `Maximum Temperature (°C)` <dbl>,
# `Minimum Temperature (°C)` <dbl>, Latitude <dbl>, Longitude <dbl>
5.2 STL Decomposition analysis
STL is a robust method of time series decomposition often used in economic and environmental analyses. The STL method uses locally fitted regression models to decompose a time series into trend, seasonal, and remainder components.
The STL algorithm performs smoothing on the time series using LOESS in two loops; the inner loop iterates between seasonal and trend smoothing and the outer loop minimizes the effect of outliers. During the inner loop, the seasonal component is calculated first and removed to calculate the trend component. The remainder is calculated by subtracting the seasonal and trend components from the time series.
single_station <- "Ang Mo Kio"
filtered_period_sstn <- filtered_period %>%
filter(Station %in% single_station)
filled_data <- filtered_period_sstn %>%
as_tsibble(index = Date) %>%
fill_gaps() %>%
mutate(
`Mean Temperature (°C)` = imputeTS::na_interpolation(`Mean Temperature (°C)`)
)stl_formula_default <- STL(`Mean Temperature (°C)`)
stl_formula_min <- STL(`Mean Temperature (°C)`
~ trend(window = 1) +
~ season(window = 1))
stl_formula_max <- STL(`Mean Temperature (°C)`
~ trend(window = 365) +
~ season(window = 365))stl_default <- filled_data %>%
model(stl_formula_default) %>%
components() stl_default_tibble <- as_tibble(stl_default) %>%
select(Date, `Mean Temperature (°C)`, trend, season_week, season_year, remainder, season_adjust) %>%
mutate(trend = round(trend, 2),
season_adjust = round(season_adjust, 2),
season_week = round(season_week, 4),
season_year = round(season_year, 4),
remainder = round(remainder, 4))
datatable(stl_default_tibble,
class= "nowrap",
rownames = FALSE,
filter = 'top', # Enables filters at the top of each column
width="100%",
options = list(pageLength = 10, scrollX = TRUE))plot_stl_default <- stl_default %>%
autoplot()
ggplotly(plot_stl_default) %>% layout(width = 700, height = 700,
plot_bgcolor="#edf2f7")stl_min <- filled_data %>%
model(stl_formula_min) %>%
components()stl_min_tibble <- as_tibble(stl_min) %>%
select(Date, `Mean Temperature (°C)`, trend, season_week, season_year, remainder, season_adjust) %>%
mutate(trend = round(trend, 2),
season_adjust = round(season_adjust, 2),
season_week = round(season_week, 4),
season_year = round(season_year, 4),
remainder = round(remainder, 4))
datatable(stl_min_tibble,
class= "nowrap",
rownames = FALSE,
filter = 'top', # Enables filters at the top of each column
width="100%",
options = list(pageLength = 10, scrollX = TRUE))plot_stl_min <- stl_min %>%
autoplot()
ggplotly(plot_stl_min) %>% layout(width = 700, height = 700,
plot_bgcolor="#edf2f7")stl_max <- filled_data %>%
model(stl_formula_max) %>%
components()stl_max_tibble <- as_tibble(stl_max) %>%
select(Date, `Mean Temperature (°C)`, trend, season_week, season_year, remainder, season_adjust) %>%
mutate(trend = round(trend, 2),
season_adjust = round(season_adjust, 2),
season_week = round(season_week, 4),
season_year = round(season_year, 4),
remainder = round(remainder, 4))
datatable(stl_max_tibble,
class= "nowrap",
rownames = FALSE,
filter = 'top', # Enables filters at the top of each column
width="100%",
options = list(pageLength = 10, scrollX = TRUE))plot_stl_max <- stl_max %>%
autoplot()
ggplotly(plot_stl_max) %>% layout(width = 700, height = 700,
plot_bgcolor="#edf2f7")The STL decomposition plot is divided into four panels. The top panel displays the original time series, while the bottom three panels break it down into its individual components: trend, seasonality, and remainder. These components—when added together—reconstruct the original data shown in the top panel. The remainder represents the residuals left after removing both the seasonal and trend-cycle effects from the data.
6. Time Series Forecasting (Daily)
6.1 Split data into training and testing
# Define the split point; for example, keeping the first 80% of rows for training
split_point <- nrow(filled_data) * 0.8
# Create the training dataset (first 80% of the data)
train_daily <- filled_data %>%
slice(1:floor(split_point))
# Create the test dataset (remaining 20% of the data)
test_daily <- filled_data %>%
slice((floor(split_point) + 1):n())6.2 Create and fit multiple model to tesing set
train_daily_fit <- train_daily %>%
model(
# naïve forecast of the seasonally adjusted data
STLNaive = decomposition_model(stl_formula_default, NAIVE(season_adjust)),
# auto arima forecast of the seasonally adjusted data
STLArima = decomposition_model(stl_formula_default, ARIMA(season_adjust)),
# Exponential Smoothing forecast of the seasonally adjusted data
STLETS = decomposition_model(stl_formula_default, ETS(season_adjust ~ season("N"))),
# AUTO arima
AUTOARIMA = ARIMA(`Mean Temperature (°C)`),
# AUTO prophet
AUTOprophet = prophet(`Mean Temperature (°C)`),
# Auto Exponential smoothing
AUTOETS = ETS(`Mean Temperature (°C)`)
)
forecast_horizon <- nrow(test_daily)
# Forecasting
train_daily_fc <- forecast(train_daily_fit, h = forecast_horizon)
# Plotting the forecasts
c <- autoplot(train_daily, `Mean Temperature (°C)`) +
autolayer(test_daily, `Mean Temperature (°C)`, series = "Test Data") +
autolayer(train_daily_fc, level = NULL) +
labs(title = "Forecast Validation for daily mean temperature of Ang Mo Kio Station") +
theme_minimal()
ggplotly(c, tooltip = c("x", "y", ".model"))6.3 Testing set forcast & Accuracy Evaluation
accuracy_metrics <- accuracy(train_daily_fc, test_daily) %>%
arrange(.model) %>%
select(.model, .type, RMSE, MAE, MAPE, MASE) %>%
mutate(across(c(RMSE, MAE, MAPE, MASE), round, 2))
datatable(accuracy_metrics,
class= "hover",
rownames = FALSE,
width="100%",
options = list(pageLength = 10,scrollX=T))6.3.1 Residual plot
residual <- train_daily %>%
model(
# naïve forecast of the seasonally adjusted data
STLNaive = decomposition_model(stl_formula_default, NAIVE(season_adjust)),
# auto arima forecast of the seasonally adjusted data
STLArima = decomposition_model(stl_formula_default, ARIMA(season_adjust)),
# Exponential Smoothing forecast of the seasonally adjusted data
STLETS = decomposition_model(stl_formula_default, ETS(season_adjust ~ season("N"))),
# AUTO arima
AUTOARIMA = ARIMA(`Mean Temperature (°C)`),
# AUTO prophet
AUTOprophet = prophet(`Mean Temperature (°C)`),
# Auto Exponential smoothing
AUTOETS = ETS(`Mean Temperature (°C)`)
) %>%
augment()
a <- autoplot(residual, .innov) +
labs(title = "Residual Plot") +
theme_minimal()
ggplotly(a)6.4 Refit to Full Dataset & Forecast Forward
# Refit models to the full dataset
full_fit <- suppressWarnings(filled_data %>%
model(
# naïve forecast of the seasonally adjusted data
STLNaive = decomposition_model(stl_formula_default, NAIVE(season_adjust)),
# auto arima forecast of the seasonally adjusted data
STLArima = decomposition_model(stl_formula_default, ARIMA(season_adjust)),
# Exponential Smoothing forecast of the seasonally adjusted data
STLETS = decomposition_model(stl_formula_default, ETS(season_adjust ~ season("N"))),
# AUTO arima
AUTOARIMA = ARIMA(`Mean Temperature (°C)`),
# AUTO prophet
AUTOprophet = prophet(`Mean Temperature (°C)`),
# Auto Exponential smoothing
AUTOETS = ETS(`Mean Temperature (°C)`)
)
)###Forecast 1 month
# Define forecast horizon as an integer (e.g., 30 days instead of "1 month")
future_horizon <- 30 # Adjust this number based on your data frequency (e.g., 30 for daily, 4 for weekly)
# Generate forecasts (with warning suppression if needed)
full_forecast <- suppressWarnings(
forecast(full_fit, h = future_horizon)
)full_forecast_df <- as_tibble(full_forecast)
full_forecast_df <- full_forecast_df %>%
mutate(.mean = round(.mean, 2)) %>%
mutate(n=n(), sd=sd(.mean)) %>%
mutate(se=sd/sqrt(n-1)) %>%
mutate(Lower = round(.mean - (1.96 * se), 2),
Upper = round(.mean + (1.96 * se), 2))# Initialize an empty plotly object
p <- plot_ly()
# Unique models for iteration and color assignment
unique_models <- unique(full_forecast_df$.model)
colors <- RColorBrewer::brewer.pal(n = length(unique_models), name = "Set1")
# Loop through each model to add to the plot
for (i in seq_along(unique_models)) {
model_name <- unique_models[i]
# Filter data for the current model
model_data <- filter(full_forecast_df, .model == model_name)
# Define the custom hovertemplate for lines
hovertemplate_line <- paste(
"Date: %{x}<br>",
"Mean Temperature (°C): %{y:.2f}<br>",
"Model: ", model_name, "<br>",
"95% CI: [%{customdata[0]:.2f}, %{customdata[1]:.2f}]<extra></extra>"
)
# Customdata for the line (lower and upper CI values)
custom_data <- mapply(function(lower, upper) list(lower, upper), model_data$Lower, model_data$Upper, SIMPLIFY = FALSE)
# Add forecast line with custom tooltip and legend grouping
p <- add_lines(p, data = model_data, x = ~Date, y = ~`.mean`, name = model_name,
line = list(color = colors[i]), hovertemplate = hovertemplate_line,
customdata = custom_data, legendgroup = model_name)
# Add confidence interval ribbon with legend grouping, but no separate legend entry
p <- add_ribbons(p, data = model_data, x = ~Date, ymin = ~Lower, ymax = ~Upper,
fillcolor = scales::alpha(colors[i], 0.2), line = list(color = "transparent"),
legendgroup = model_name, showlegend = FALSE, hoverinfo = "skip")
}
# Customize layout
p <- p %>% layout(title = "Future Forecast Plot with 95% CI for daily mean temperature of Ang Mo Kio Station",
xaxis = list(title = "Date"),
yaxis = list(title = "Mean Temperature (°C)"),
legend = list(title = list(text = 'Model')),
hovermode = 'closest')
# Display the plot
p# Table view
col<- full_forecast_df %>%
select(.model, Date, .mean) %>%
rename(Forecast = .mean) %>%
mutate(Forecast = round(Forecast, 2),
.model = as.factor(.model))
datatable(col,
class= "hover",
rownames = FALSE,
filter = 'top', # Enables filters at the top of each column
width="100%",
options = list(pageLength = 6, scrollX = TRUE))7. Time Series Forecasting [Temperature-weekly average]
summarised_temp_week# A tsibble: 4,420 x 4 [1W]
# Key: Station [17]
Station year_week avg_temp na.rm
<chr> <week> <dbl> <lgl>
1 Admiralty 2020 W01 27.7 TRUE
2 Admiralty 2020 W02 27.8 TRUE
3 Admiralty 2020 W03 27.5 TRUE
4 Admiralty 2020 W04 27.4 TRUE
5 Admiralty 2020 W05 27.3 TRUE
6 Admiralty 2020 W06 28.4 TRUE
7 Admiralty 2020 W07 27.7 TRUE
8 Admiralty 2020 W08 27.3 TRUE
9 Admiralty 2020 W09 27.7 TRUE
10 Admiralty 2020 W10 28.5 TRUE
# ℹ 4,410 more rows
summarised_temp_week_sstn <- summarised_temp_week %>%
filter(Station == "Ang Mo Kio") %>%
as_tsibble(index = year_week) %>%
fill_gaps() %>%
mutate(
avg_temp = imputeTS::na_interpolation(avg_temp)
)# Define the split point; for example, keeping the first 80% of rows for training
split_point <- nrow(summarised_temp_week_sstn ) * 0.8
# Create the training dataset (first 80% of the data)
train_temp_week <-summarised_temp_week_sstn %>%
slice(1:floor(split_point))
# Create the test dataset (remaining 20% of the data)
test_temp_week <-summarised_temp_week_sstn %>%
slice((floor(split_point) + 1):n())7.1 Split data into training and testing
train_temp_week_fit <- train_temp_week %>%
model(
# naïve forecast of the seasonally adjusted data
STLNaive = decomposition_model(STL(avg_temp), NAIVE(season_adjust)),
# auto arima forecast of the seasonally adjusted data
STLArima = decomposition_model(STL(avg_temp), ARIMA(season_adjust)),
# Exponential Smoothing forecast of the seasonally adjusted data
STLETS = decomposition_model(STL(avg_temp), ETS(season_adjust ~ season("N"))),
# AUTO arima
AUTOARIMA = ARIMA(avg_temp),
# AUTO prophet
AUTOprophet = prophet(avg_temp),
# Auto Exponential smoothing
AUTOETS = ETS(avg_temp)
)
forecast_horizon <- nrow(test_temp_week)
# Forecasting
train_temp_week_fc <- forecast(train_temp_week_fit, h = forecast_horizon)# Plotting the forecasts
c <- autoplot(train_temp_week, avg_temp) +
autolayer(test_temp_week, avg_temp, series = "Test Data") +
autolayer(train_temp_week_fc, level = NULL) +
labs(title = "Forecast Validation for weekly mean temperature of Ang Mo Kio Station") +
theme_minimal()
ggplotly(c, tooltip = c("x", "y", ".model"))7.3 Testing set forecast & Accuracy Evaluation
accuracy_metrics <- accuracy(train_temp_week_fc, test_temp_week) %>%
arrange(.model) %>%
select(.model, .type, RMSE, MAE, MAPE, MASE) %>%
mutate(across(c(RMSE, MAE, MAPE, MASE), round, 2))
datatable(accuracy_metrics,
class= "hover",
rownames = FALSE,
width="100%",
options = list(pageLength = 10,scrollX=T))7.3.1 Residual plot
residual <- train_temp_week %>%
model(
# naïve forecast of the seasonally adjusted data
STLNaive = decomposition_model(STL(avg_temp), NAIVE(season_adjust)),
# auto arima forecast of the seasonally adjusted data
STLArima = decomposition_model(STL(avg_temp), ARIMA(season_adjust)),
# Exponential Smoothing forecast of the seasonally adjusted data
STLETS = decomposition_model(STL(avg_temp), ETS(season_adjust ~ season("N"))),
# AUTO arima
AUTOARIMA = ARIMA(avg_temp),
# AUTO prophet
AUTOprophet = prophet(avg_temp),
# Auto Exponential smoothing
AUTOETS = ETS(avg_temp)
)%>%
augment()
a <- autoplot(residual, .innov) +
labs(title = "Residual Plot") +
theme_minimal()
ggplotly(a)7.4 Refit to Full Dataset & Forecast Forward
# Refit models to the full dataset
full_temp_week_fit <- summarised_temp_week_sstn %>%
model(
# naïve forecast of the seasonally adjusted data
STLNaive = decomposition_model(STL(avg_temp), NAIVE(season_adjust)),
# auto arima forecast of the seasonally adjusted data
STLArima = decomposition_model(STL(avg_temp), ARIMA(season_adjust)),
# Exponential Smoothing forecast of the seasonally adjusted data
STLETS = decomposition_model(STL(avg_temp), ETS(season_adjust ~ season("N"))),
# AUTO arima
AUTOARIMA = ARIMA(avg_temp),
# AUTO prophet
AUTOprophet = prophet(avg_temp),
# Auto Exponential smoothing
AUTOETS = ETS(avg_temp)
)# Plotting the forecasts along with the full dataset
future_horizon <- "10 week" # Adjust this to your forecast needs
full_temp_week_forecast <- forecast(full_temp_week_fit, h = future_horizon)e <- autoplot(full_temp_week_forecast, level = 95) +
labs(title = "Future Forecast Plot for weekly mean temperature of Ang Mo Kio Station") +
theme_minimal()
ggplotly(e, tooltip = c("x", "y",".model"))8. Time Series Forecasting [Rainfall-weekly total]
8.1 Split data into training and testing
summarised_rain_week# A tsibble: 4,420 x 4 [1W]
# Key: Station [17]
Station year_week total_rainfall na.rm
<chr> <week> <dbl> <lgl>
1 Admiralty 2020 W01 9 TRUE
2 Admiralty 2020 W02 26 TRUE
3 Admiralty 2020 W03 1.4 TRUE
4 Admiralty 2020 W04 24.6 TRUE
5 Admiralty 2020 W05 94 TRUE
6 Admiralty 2020 W06 26.8 TRUE
7 Admiralty 2020 W07 14.8 TRUE
8 Admiralty 2020 W08 6.8 TRUE
9 Admiralty 2020 W09 0.4 TRUE
10 Admiralty 2020 W10 14.6 TRUE
# ℹ 4,410 more rows
summarised_rain_week_sstn <- summarised_rain_week %>%
filter(Station == "Ang Mo Kio") %>%
as_tsibble(index = year_week) %>%
fill_gaps() %>%
mutate(
total_rainfall = imputeTS::na_interpolation(total_rainfall)
)# Define the split point; for example, keeping the first 80% of rows for training
split_point <- nrow(summarised_rain_week_sstn) * 0.8
# Create the training dataset (first 80% of the data)
train_rain_week <- summarised_rain_week_sstn %>%
slice(1:floor(split_point))
# Create the test dataset (remaining 20% of the data)
test_rain_week <- summarised_rain_week_sstn %>%
slice((floor(split_point) + 1):n())8.2 Create and fit multiple model to tesing set
train_rain_week_fit <- train_rain_week %>%
model(
# naïve forecast of the seasonally adjusted data
STLNaive = decomposition_model(STL(total_rainfall), NAIVE(season_adjust)),
# auto arima forecast of the seasonally adjusted data
STLArima = decomposition_model(STL(total_rainfall), ARIMA(season_adjust)),
# Exponential Smoothing forecast of the seasonally adjusted data
STLETS = decomposition_model(STL(total_rainfall), ETS(season_adjust ~ season("N"))),
# AUTO arima
AUTOARIMA = ARIMA(total_rainfall),
# AUTO prophet
AUTOprophet = prophet(total_rainfall),
# Auto Exponential smoothing
AUTOETS = ETS(total_rainfall)
)
forecast_horizon <- nrow(test_rain_week)
# Forecasting
train_rain_week_fc <- forecast(train_rain_week_fit, h = forecast_horizon)# Plotting the forecasts
c <- autoplot(train_rain_week, total_rainfall) +
autolayer(test_rain_week, total_rainfall, series = "Test Data") +
autolayer(train_rain_week_fc, level = NULL) +
labs(title = "Forecast Validation for weekly total rainfall of Ang Mo Kio Station") +
theme_minimal()
ggplotly(c, tooltip = c("x", "y", ".model"))8.3 Testing set forcast & Accuracy Evaluation
accuracy_metrics <- accuracy(train_rain_week_fc, test_rain_week) %>%
arrange(.model) %>%
select(.model, .type, RMSE, MAE, MAPE, MASE) %>%
mutate(across(c(RMSE, MAE, MAPE, MASE), round, 2))
datatable(accuracy_metrics,
class= "hover",
rownames = FALSE,
width="100%",
options = list(pageLength = 10,scrollX=T))8.3.1 Residual plot
residual <- train_rain_week %>%
model(
# naïve forecast of the seasonally adjusted data
STLNaive = decomposition_model(STL(total_rainfall), NAIVE(season_adjust)),
# auto arima forecast of the seasonally adjusted data
STLArima = decomposition_model(STL(total_rainfall), ARIMA(season_adjust)),
# Exponential Smoothing forecast of the seasonally adjusted data
STLETS = decomposition_model(STL(total_rainfall), ETS(season_adjust ~ season("N"))),
# AUTO arima
AUTOARIMA = ARIMA(total_rainfall),
# AUTO prophet
AUTOprophet = prophet(total_rainfall),
# Auto Exponential smoothing
AUTOETS = ETS(total_rainfall)
)%>%
augment()
a <- autoplot(residual, .innov) +
labs(title = "Residual Plot") +
theme_minimal()
ggplotly(a)8.4 Refit to Full Dataset & Forecast Forward
# Refit models to the full dataset
full_rain_week_fit <- summarised_rain_week_sstn %>%
model(
# naïve forecast of the seasonally adjusted data
STLNaive = decomposition_model(STL(total_rainfall), NAIVE(season_adjust)),
# auto arima forecast of the seasonally adjusted data
STLArima = decomposition_model(STL(total_rainfall), ARIMA(season_adjust)),
# Exponential Smoothing forecast of the seasonally adjusted data
STLETS = decomposition_model(STL(total_rainfall), ETS(season_adjust ~ season("N"))),
# AUTO arima
AUTOARIMA = ARIMA(total_rainfall),
# AUTO prophet
AUTOprophet = prophet(total_rainfall),
# Auto Exponential smoothing
AUTOETS = ETS(total_rainfall)
)# Plotting the forecasts along with the full dataset
future_horizon <- "10 week" # Adjust this to your forecast needs
full_rain_week_forecast <- forecast(full_rain_week_fit, h = future_horizon)e <- autoplot(full_rain_week_forecast, level = 95) +
labs(title = "Future Forecast Plot for weekly total rainfall of Ang Mo Kio Station") +
theme_minimal()
ggplotly(e, tooltip = c("x", "y",".model"))full_rain_week_forecast_tibble <- as_tibble(full_rain_week_forecast)
col <- full_rain_week_forecast_tibble %>%
mutate(year_week = paste(year(year_week), "w",
sprintf("%02d", isoweek(year_week)), sep="")) %>%
select(.model, year_week, .mean) %>%
rename(Forecast = .mean) %>%
mutate(Forecast = round(Forecast, 2),
.model = as.factor(.model),
year_week = as.factor(year_week))
datatable(col,
class= "hover",
rownames = FALSE,
filter = 'top',
width="100%",
options = list(pageLength = 6, scrollX = TRUE))